---
title: "PC_Analysis_Cleaned"
author: "Yingjie Fan"
date: "2023-09-15"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(
	echo = FALSE,
	message = FALSE,
	warning = FALSE,
	results = "asis"
)
rm(list=ls())
path = "/Users/yf5068/Dropbox/*Princeton/Research/PC/" #office laptop
#path = "/Users/yingjiefan/Library/CloudStorage/Dropbox/*Princeton/Research/PC/" #personal pc
```

```{r packages}
library(fst)
library(dplyr)
library(tidyr)
library(tidyverse)
library(ggplot2)
library(stringr)
library(zoo)
library(nlme)
library(stargazer)
library(lubridate)
library(openxlsx)
library(showtext)
library(broom)
#showtext_auto()
```

### Treatment Date:
1. Date: July 25, 2017
2. When weekly analysis: July 24, 2017 as Monday
3. When monthly analysis: August 1st, 2018 

```{r Load Data}
admin_66 = read.fst(paste0(path,"Data/ZJ_administrative_clean_66.fst")) %>%
  mutate(case_date = as.Date(case_date),
         case_month = as.yearmon(case_date),
         D=ifelse(case_date>as.Date("2017-07-25"),1,0),
         D_month=ifelse(case_date>as.Date("2017-07-31"),1,0))
admin_67 = read.fst(paste0(path,"Data/ZJ_administrative_clean_67.fst"))%>%
  mutate(case_date = as.Date(case_date),
         case_month = as.yearmon(case_date),
         D=ifelse(case_date>as.Date("2017-07-25"),1,0),
         D_month=ifelse(case_date>as.Date("2017-07-31"),1,0))
geo = read.fst(paste0(path,"Data/浙江卖淫嫖娼merged_geocoded.fst"))
crime = read.fst(paste0(path,"Data/national_criminal_clean_full.fst")) %>% 
  mutate(location_binary = ifelse(location_category=="Business","Business","Non-Business"))
```


```{r market difference before}
admin_before = geo %>% 
  filter(treat==0) %>% 
  group_by(case_date,case_month,department,arrest_long,location_binary) %>% 
  summarise(n=n())#the assumption here is that police goes to one place only once a day.

n_before_lm = lm(n ~ factor(location_binary), data = admin_before) 
n_before_plm <- plm(n ~ factor(location_binary), 
                    data = admin_before,
                    index = "department",
                    model = "within",
             vcov = "cluster")
summary(n_before_plm)

n_before_reg = stargazer(n_before_lm,n_before_plm, type = "latex", 
          #column.labels = c("Fairness","Baseline"),
          covariate.labels = c("Non-business Location"),
          #omit.stat = "all", 
          add.lines=list(c('Police Department Fixed Effect', 'No','Yes')),
          digits = 2)
sink(paste0(path,"/output/n_before_reg.tex"))
cat(n_before_reg, sep = "\n")
sink()
```



















### Criminal Data

```{r F1: Number of Criminal Arrested by Location}

summary = crime %>%
    mutate(prosecution_month = as.yearmon(prosecution_date))%>%
  filter(prosecution_date>as.Date("2017-07-31") & prosecution_date<as.Date("2018-02-01")) %>% 
  group_by(prosecution_month,location_binary)%>%
  summarise(n=n()) %>% 
  group_by(location_binary) %>% 
  summarise(count = mean(n))

national_org_irts_p = crime %>%
  mutate(prosecution_month = as.yearmon(prosecution_date))%>%
    mutate(D=ifelse(prosecution_date>as.Date("2017-07-31"),1,0))%>%
  group_by(prosecution_month,location_binary,D)%>%
  summarise(n=n())%>%
  filter(prosecution_month>as.yearmon("Jan 2014") & prosecution_month<as.yearmon("Dec 2020"))%>%
  filter(prosecution_month!=as.yearmon("Jul 2017"))%>%
  mutate(group = interaction(location_binary,D))%>%
  ggplot(aes(x=prosecution_month,y=n,color=location_binary))+
  geom_point()+
  geom_smooth(method = "lm",aes(group=group))+
    # geom_smooth(method = loess, color="Black", fill="Blue",linetype ="dotted",
    #             alpha = 0.1, aes(group=as.factor(group)))+ 
      geom_vline(xintercept = as.yearmon("Aug 2017"),linetype="dotted", 
                color = "blue")+
  theme_bw()+
  theme(legend.position = "bottom",text = element_text(size = 12))+
  labs(title="Number of Cases Prosecuted Pimps under Criminal Charges",x="Prosecuted Month",y="",color="Arrested Location")

national_org_irts_p
ggsave(paste0(path,"figs/national_org_irts_p.png"),national_org_irts_p,width=7, height=5)
```


```{r T2: IRTS Criminal Immediate Effect - Week, results='asis'}
cri_month_summary = crime %>%
    select(prosecution_date,location_category)%>%
  drop_na(prosecution_date) %>%
  filter(prosecution_date< as.Date("2020-01-01"))%>%
    mutate(prosecution_month = as.yearmon(prosecution_date))%>%
    filter(prosecution_month>as.yearmon("Jan 2014"))%>%
    filter(prosecution_month!=as.yearmon("Jul 2017"))%>%
      filter(prosecution_month!=as.yearmon("June 2017"))%>%
  mutate(D=ifelse(as.numeric(prosecution_month)>as.numeric(as.yearmon("July 2017")),1,0)) %>%
  group_by(prosecution_month,location_category,D)%>%
  summarise(n=n())%>%
    mutate(T=(as.numeric(prosecution_month)-as.numeric(as.yearmon("July 2017")))*12)





irts_lm<-lapply(split(cri_month_summary, factor(cri_month_summary$location_category)), function(x)lm(n ~ T + D + T*D, data=x,na.action = na.omit))
irts_gls<-lapply(split(cri_month_summary, factor(cri_month_summary$location_category)), function(x)gls(n ~ T + D + T*D, data=x,correlation = corAR1(form = ~ 1),na.action = na.omit))


output <- capture.output(stargazer(irts_lm$Business,irts_lm$`Not Business`,irts_gls$Business,irts_gls$`Not Business`,
           type = "latex", 
           dep.var.labels = ("Criminal: Organized Prostitutions Cases (month)"),
           column.labels = c("Business","Non-Business","Business","Non-Business"),
           # covariate.labels = c("Baseline Trend", "Level Change",
           #                      "Trend Change",
           #                      "Baseline Growth"),
           #omit.stat = "all", 
           digits = 2 ))

stargazer(irts_lm$Business,irts_lm$`Not Business`,irts_gls$Business,irts_gls$`Not Business`,
           type = "latex", 
           dep.var.labels = ("Criminal: Organized Prostitutions Cases"),
           column.labels = c("Business","Non-Business","Business","Non-Business"),
           # covariate.labels = c("Baseline Trend", "Level Change",
           #                      "Trend Change",
           #                      "Baseline Growth"),
           #omit.stat = "all", 
           digits = 2 )
sink(paste0(path,"output/month_criminal_cases.tex"))
cat(output, sep = "\n")
sink()

```


```{r F1: Number of Arrested Clients and Sex Workers by Location (Month)}
monthly_arrest_n = admin_66 %>% 
  filter(location_category=="Business"|location_category=="Private")%>%
  group_by(case_month,location_category,D_month)%>%
  summarise(n=n()) %>% 
  ggplot(aes(x=case_month,y=n,color=location_category))+
  geom_point()+
  geom_line()+
  geom_vline(xintercept = as.yearmon("August 2017"),linetype="dotted", 
                color = "blue")+
  theme(legend.position = "bottom",text = element_text(size=12))+
  labs(y="", x = "Arrested Month",title="Number of Monthly Arrested Prostitution Clients and Sex Workers",color="")
ggsave(paste0(path,"figs/monthly_arrest_n.png"),monthly_arrest_n,width=10, height=7)
monthly_arrest_n
```


```{r T.Test on number of arrested prostitutions}
# admin_66 %>% 
#   filter(location_category=="Business"|location_category=="Private")%>%
#   group_by(case_month,location_category,D_month)%>%
#   summarise(n=n()) 
# 
# 
# crime_admin_t = crime_admin %>% 
#   mutate(T=as.numeric(as.numeric(arrested_police_month)-as.numeric(as.yearmon("Jul 2017"))))%>%
#   filter(T<2 & T>=-1) %>% 
#   mutate(T_category = ifelse(T < 0,"1 Year Before Treatment",
#                            ifelse(T>=0 &T<1,"1 Year After Treatment",
#                              "2 Year After Treatment")))%>%
#   mutate(T_category = factor(T_category, levels = c("1 Year Before Treatment","1 Year After Treatment","2 Year After Treatment"))) %>% 
#   mutate(n_total = n_crime+n_admin,
#          n_business = Business_Admin+Business_Crime,
#          n_private = Private_Admin+Private_Crime)%>%
#   filter(T_category !="1 Year After Treatment")
# 
#   
# t.test(crime_admin_t$n_total[crime_admin_t$T_category=="1 Year Before Treatment"],crime_admin_t$n_total[crime_admin_t$T_category!="1 Year Before Treatment"])#fail to reject null
# 
# 
# t.test(crime_admin_t$n_business[crime_admin_t$T_category=="1 Year Before Treatment"],crime_admin_t$n_business[crime_admin_t$T_category!="1 Year Before Treatment"])#Businesss decreased *
# 
# t.test(crime_admin_t$n_private[crime_admin_t$T_category=="1 Year Before Treatment"],crime_admin_t$n_private[crime_admin_t$T_category!="1 Year Before Treatment"])#Businesss increased *

```

```{r F: Number of Arrested Clients and Sex Workers (Month)}
monthly_arrest_n = admin_66 %>% 
  filter(location_category=="Business"|location_category=="Private")%>%
  group_by(case_month,D_month)%>%
  summarise(n=n()) %>% 
  ggplot(aes(x=case_month,y=n))+
  geom_point()+
  geom_line()+
  geom_vline(xintercept = as.yearmon("August 2017"),linetype="dotted", 
                color = "blue")+
    coord_cartesian(xlim = c(as.yearmon("Jan 2016"), as.yearmon("Feb 2020")))+theme_bw()+
  theme(legend.position = "bottom",text = element_text(size=18))+
  labs(y="", x = "Arrested Month",title="Number of Monthly Prostitution Arrests",color="",subtitle = "Prostitution Clients and Sex Workers")
ggsave(paste0(path,"figs/monthly_arrest_n.png"),monthly_arrest_n,width=10, height=7)
monthly_arrest_n

```


```{r F2: Ratio of Arrested Clients and Sex Workers by Location (Month)}
monthly_arrest_pct = admin_66 %>% 
  filter(location_category=="Business"|location_category=="Private")%>%
    mutate(location_category=ifelse(location_category=="Private","Non-Business",location_category))%>%
  group_by(case_month,location_category,D_month)%>%
  summarise(n=n())%>%
  ungroup%>%
  group_by(case_month) %>%
  mutate(pct = n / sum(n)) %>% 
  ggplot(aes(x=case_month,y=100*pct,color=location_category,group=location_category))+
  geom_point()+geom_line()+
  geom_vline(xintercept = as.yearmon("August 2017"),linetype="dotted", 
                color = "blue")+
      coord_cartesian(xlim = c(as.yearmon("Jan 2016"), as.yearmon("Feb 2020")))+
    theme_bw()+
  theme(legend.position = "bottom",text = element_text(size = 18))+
  labs(y="Percentage of Arrest (%)", x = "Arrested Month",title="Ratio of Monthly Prostitution Arrests",color="Arrested Location",subtitle = "Prostitution Clients and Sex Workers") #title="Ratio of Monthly Arrested Prostitution Clients and Sex Workers"
ggsave(paste0(path,"figs/monthly_arrest_pct.png"),monthly_arrest_pct,width=10, height=7)
monthly_arrest_pct

monthly_arrest_pct_short = admin_66 %>% 
  filter(location_category=="Business"|location_category=="Private")%>%
  mutate(location_category=ifelse(location_category=="Private","Non-Business",location_category))%>%
    filter(as.Date(case_date)< as.Date("2018-04-01")) %>% 
  group_by(case_month,location_category,D_month)%>%
  summarise(n=n())%>%
  ungroup%>%
  group_by(case_month) %>%
  mutate(pct = n / sum(n)) %>% 
  ggplot(aes(x=case_month,y=100*pct,color=location_category,group=location_category))+
  geom_point()+geom_line()+
  geom_vline(xintercept = as.yearmon("August 2017"),linetype="dotted", 
                color = "blue")+
      coord_cartesian(xlim = c(as.yearmon("May 2017"), as.yearmon("Apr 2018")))+
    theme_bw()+
  theme(legend.position = "bottom",text = element_text(size = 18))+
  labs(y="Percentage of Arrest (%)", x = "Arrested Month",title="Ratio of Monthly Prostitution Arrests",color="Arrested Location",subtitle = "Prostitution Clients and Sex Workers") #title="Ratio of Monthly Arrested Prostitution Clients and Sex Workers"

ggsave(paste0(path,"figs/monthly_arrest_pct_short.png"),monthly_arrest_pct_short,width=10, height=7)
monthly_arrest_pct_short


```

```{r T1: IRTS of Arrested Clients and Sex Workers by Location(Date), results='asis'}
admin_analysis_d = admin_66 %>%
  filter(六十六==1)%>%
  filter(location_category!="Not Found")%>%
  group_by(case_date,location_category)%>%
  summarize(n=n())%>%
  ungroup()%>%
  group_by(case_date)%>%
  mutate(pct=round(100 * n/sum(n),2))%>%
  mutate(case_date= as.Date(case_date))%>%
  mutate(D=ifelse(case_date>"2017-07-25",1,0)) %>%
  mutate(T=as.numeric(case_date-as.numeric(as.Date("2017-07-25"))))

irts_lm<-lapply(split(admin_analysis_d, factor(admin_analysis_d$location_category)), function(x)lm(pct ~ T + D + T*D, data=x,na.action = na.omit))
irts_gls<-lapply(split(admin_analysis_d, factor(admin_analysis_d$location_category)), function(x)gls(pct ~ T + D + T*D, data=x,correlation = corAR1(form = ~ 1),na.action = na.omit))

output <- capture.output(stargazer(irts_lm$Business,irts_lm$`Private`,irts_gls$Business,irts_gls$Private,
           type = "latex", 
           dep.var.labels = ("Administrative: Solicitation Cases by Week"),
           column.labels = c("Business","Non-Business","Business","Non-Business"),
           # covariate.labels = c("Baseline Trend", "Level Change",
           #                      "Trend Change",
           #                      "Baseline Growth"),
           #omit.stat = "all", 
           digits = 2 ))

stargazer(irts_lm$Business,irts_lm$`Private`,irts_gls$Business,irts_gls$Private,
           type = "html", 
           dep.var.labels = ("Administrative: Solicitation Cases by Week"),
           column.labels = c("Business","Non-Business","Business","Non-Business"),
           # covariate.labels = c("Baseline Trend", "Level Change",
           #                      "Trend Change",
           #                      "Baseline Growth"),
           #omit.stat = "all", 
           digits = 2 )


output <- capture.output(stargazer(irts_lm$Business,irts_lm$`Private`,irts_gls$Business,irts_gls$Private,
           type = "latex", 
           dep.var.labels = ("Administrative: Solicitation Cases by Week"),
           column.labels = c("Business","Non-Business","Business","Non-Business"),
           # covariate.labels = c("Baseline Trend", "Level Change",
           #                      "Trend Change",
           #                      "Baseline Growth"),
           #omit.stat = "all", 
           digits = 2 ))

sink(paste0(path,"/output/admin_solicitation_cases.tex"))
cat(output, sep = "\n")
sink()
```

```{r F3: Number of Arrested Pimps by Location (Month)}
monthly_arrest_pimp_n = admin_67%>% 
  filter(location_category=="Business"|location_category=="Private")%>%
  group_by(case_month,location_category,D_month)%>%
  summarise(n=n()) %>% 
  ggplot(aes(x=case_month,y=n,color=location_category))+
  geom_point()+
  geom_line()+
  geom_vline(xintercept = as.yearmon("August 2017"),linetype="dotted", 
                color = "blue")+
  theme(legend.position = "bottom",text = element_text(size=12))+
  labs(y="", x = "Arrested Month",title="Number of Arrested Pimps under Administrative Penalties",color="")
ggsave(paste0(path,"figs/monthly_arrest_pimp_n.png"),monthly_arrest_pimp_n,width=10, height=7)
monthly_arrest_pimp_n

admin_67%>% 
  filter(location_category=="Business"|location_category=="Private")%>%
  filter(case_date>as.Date("2016-07-25") & case_date<as.Date("2019-07-25"))%>%
  mutate(T=as.numeric(case_date-as.numeric(as.Date("2017-07-25"))))%>%
  mutate(T_category = ifelse(T < 0,"1 Year Before Treatment",
                           ifelse(T>=0 &T<366,"1 Year After Treatment",
                             "2 Year After Treatment"))) %>% 
  group_by(case_month,T_category)%>%
  summarise(n=n()) %>% 
  ggplot(aes(x=case_month,y=n))+
  geom_point()+
  geom_line()+
  geom_vline(xintercept = as.yearmon("August 2017"),linetype="dotted", 
                color = "blue")+
  theme(legend.position = "bottom",text = element_text(size=12))+
  labs(y="", x = "Arrested Month",title="Number of Arrested Pimps under Administrative Penalties",color="")






```

```{r Price and Quantity}
admin_66 %>%
    filter(location_category=="Business"|location_category=="Private")%>%
  filter(case_date>as.Date("2016-07-25") & case_date<as.Date("2019-07-25"))%>%
  mutate(case_month= as.yearmon(case_date))%>%
  mutate(T=as.numeric(case_date-as.numeric(as.Date("2017-07-25"))))%>%
  mutate(T_category = ifelse(T < 0,"1 Year Before Treatment",
                           ifelse(T>=0 &T<366,"1 Year After Treatment",
                             "2 Year After Treatment")))%>%
  filter(T_category=="1 Year Before Treatment") %>% 
  mutate(T_category = factor(T_category, levels = c("1 Year Before Treatment","1 Year After Treatment","2 Year After Treatment")))%>%
  ggplot(aes(x=price))+
  geom_density(aes(color=location_category))+
  xlim(0,1500)+
  facet_wrap(~location_category)


admin_66 %>%
  filter(location_category=="Business"|location_category=="Private")%>%
  filter(case_date>as.Date("2016-07-25") & case_date<as.Date("2019-07-25"))%>%
  mutate(case_month= as.yearmon(case_date))%>%
  mutate(T=as.numeric(case_date-as.numeric(as.Date("2017-07-25"))))%>%
  mutate(T_category = ifelse(T < 0,"1 Year Before Treatment",
                           ifelse(T>=0 &T<366,"1 Year After Treatment",
                             "2 Year After Treatment")))%>%
  filter(T_category=="1 Year Before Treatment") %>%
  group_by(location_category) %>% 
  summarise(n=n(),mean = mean(price,na.rm = TRUE),median=median(price,na.rm = TRUE))
```

```{r}

```



```{r Ratio of Number of Arrested Clients vs Pimps under Administrative Penalties, eval=FALSE, include=FALSE}
monthly66 = admin_66 %>%
  filter(location_category=="Business"|location_category=="Private")%>%
  group_by(case_month,location_category)%>%
  summarize(n_66=n())
monthly67 = admin_67%>% 
  filter(location_category=="Business"|location_category=="Private")%>%
  group_by(case_month,location_category,D_month)%>%
  summarise(n_67=n())

monthly_admin = full_join(monthly66,monthly67) %>% 
  mutate(ratio = n_67/n_66)

monthly_admin %>% 
ggplot(aes(x=case_month,y=ratio,color=location_category,group=location_category))+
  geom_point()+
  geom_line()+
  geom_smooth()+
  geom_vline(xintercept = as.yearmon("August 2017"),linetype="dotted", 
                color = "blue")+
  theme(legend.position = "bottom",text = element_text(size=12))+
  labs(y="", x = "Arrested Month",title="Ratio of Pimps / Clients in Administrative Penalties",color="")
```

```{r Whether the mode of solicitation is different by location}
admin_66 %>% 
  group_by(case_month, tech, location_binary) %>%
  filter(tech =="online")%>%
  summarize(n = n())%>%
  ggplot(aes(x=case_month,y=n,color=location_binary,group=location_binary))+
  geom_point()+
  geom_smooth()+
  geom_vline(xintercept = as.yearmon("August 2017"),linetype="dotted", 
                color = "blue")+
  theme(legend.position = "bottom",text = element_text(size=12))+
  labs(y="", x = "",title="Solicitation via Internet",fill="Arrested Location")

admin_66 %>% 
  group_by(case_month,location_binary) %>%
  summarize(n_total = n()) %>% 
  left_join(admin_66 %>% 
  group_by(case_month, tech, location_binary) %>%
  filter(tech =="online")%>%
  summarise(n_tech=n())) %>% 
  drop_na(n_tech) %>% 
  select(-tech) %>% 
  pivot_longer(n_total:n_tech, names_to = "type",values_to = "n") %>% 
  ggplot(aes(x=case_month,y=n,color=type,group=type))+
  geom_point()+
  facet_wrap(~location_binary)+
  geom_smooth()+
  geom_vline(xintercept = as.yearmon("August 2017"),linetype="dotted", 
                color = "blue")

```



### Criminal and Administrative
```{r Administrative Arrest to Criminal Charges Percentage}
zj_crime = crime %>% 
  filter(province=="浙") %>% 
  mutate(arrested_police_date = as.Date(arrested_police_date,format = "%Y年%m月%d日"),
         arrested_police_month = as.yearmon(arrested_police_date))%>%
  group_by(arrested_police_month,location_category) %>% 
  summarise(n_crime=n()) %>% 
  mutate(location_category= ifelse(location_category !="Business","Private","Business")) %>% 
    pivot_wider(names_from = location_category,values_from = n_crime)


crime_time_gap = crime %>% 
  filter(province=="浙") %>% 
  mutate(arrested_police_date = as.Date(arrested_police_date,format = "%Y年%m月%d日"),
         time_gap = as.numeric(prosecution_date-arrested_police_date)) %>% 
  filter(time_gap>0)
mean(crime_time_gap$time_gap)/30#average time delay is 5.7 months

admin_n = admin_67 %>% 
  filter(location_category=="Business"|location_category=="Private")%>%
  group_by(case_month,location_category) %>%
  summarize(n_admin = n()) %>% 
  pivot_wider(names_from = location_category,values_from = n_admin) %>% 
  mutate(n_admin = Business + Private)

crime_admin = left_join(zj_crime,admin_n,by=c("arrested_police_month" = "case_month")) %>% 
  as.data.frame() %>% 
  rename(Business_Crime = Business.x, Business_Admin = Business.y,Private_Crime = Private.x, Private_Admin = Private.y) %>% 
  mutate(Private_Crime = replace_na(Private_Crime,0),
         n_crime = Business_Crime+Private_Crime,
         p_sa = (Business_Crime+Private_Crime)/(Business_Crime+Private_Crime+ Business_Admin+Private_Admin),
         p_b_sa = (Business_Crime)/(Business_Crime+ Business_Admin),
         p_nb_sa = (Private_Crime)/(Private_Crime+Private_Admin)) %>% 
  drop_na(Business_Admin) %>% 
  pivot_longer(p_b_sa:p_nb_sa,names_to = "market",values_to = "prosecution_prob")

### This figure illustrate the prosectution rate: how many administrative arrests will lead to criminal prosecutions. 

psb = crime_admin %>% 
  ggplot(aes(x=arrested_police_month,y=prosecution_prob,color=market))+
  geom_point()+
  geom_smooth()+
  scale_color_manual(
    values = c("p_b_sa" = "#F8766D", "p_nb_sa" = "#00BFC4"),  # Adjust colors as needed
    labels = c("p_b_sa" = expression(P[paste(s, "|", b)]), "p_nb_sa" = expression(P[paste(s, "|", nb)]))
  ) +
  coord_cartesian(xlim = c(as.yearmon("Jan 2016"), as.yearmon("Feb 2020")))+
  geom_vline(xintercept = as.yearmon("Dec 2017"),linetype="dotted", 
                color = "blue")+
  theme_bw()+
  theme(legend.position = "bottom",text = element_text(size=22))+
  labs(title = "Differential Effects of the Legal Changes",
         x="Arrest Month",y="",color="",caption = "Average Processing Time from Arrest to Prosecution is 5.7 month.")
#expression(paste("Probability of Pimps Being Prosecuted after Arrest ", P[paste(s, "|", b)] * " & " * P[paste(s, "|", nb)]))

ggsave(paste0(path,"figs/prob_prosecution_arrest.png"),psb,width=10, height=7)
```

```{r Regression on the number of arrested pimps }

crime_admin_t = crime_admin %>% 
  mutate(T=as.numeric(as.numeric(arrested_police_month)-as.numeric(as.yearmon("Jul 2017"))))%>%
  filter(T<2 & T>=-1) %>% 
  mutate(T_category = ifelse(T < 0,"1 Year Before Treatment",
                           ifelse(T>=0 &T<1,"1 Year After Treatment",
                             "2 Year After Treatment")))%>%
  mutate(T_category = factor(T_category, levels = c("1 Year Before Treatment","1 Year After Treatment","2 Year After Treatment"))) %>% 
  mutate(n_total = n_crime+n_admin,
         n_business = Business_Admin+Business_Crime,
         n_private = Private_Admin+Private_Crime)%>%
  filter(T_category !="1 Year After Treatment") %>% 
  mutate(D = ifelse(T<0,0,1))

lm_total = lm(n_total~D,data = crime_admin_t)
lm_b = lm(n_business~D,data = crime_admin_t)
lm_nb = lm(n_private~D,data = crime_admin_t)

stargazer(lm_total,lm_b,lm_nb,
          type = "latex", 
          dep.var.labels=c("Total Arrested Pimps","Business","Non-Business"))


output <- capture.output(stargazer(lm_total,lm_b,lm_nb,
           type = "latex", 
           dep.var.labels = ("Number of Arrested Pimps"),
           column.labels = c("Total","Business","Non-Business"),
           digits = 2 ))

sink(paste0(path,"/output/arrested_pimps.tex"))
cat(output, sep = "\n")
sink()

library(broom)

tidy(lm_total)
regression_output = tidy(lm_total) %>% 
  as.data.frame() %>% 
  mutate(sample = NA) %>% 
  bind_rows(tidy(lm_b)) %>% 
  bind_rows(tidy(lm_nb)) 
regression_output$sample = c("Total","Total","Business","Business","Non-Business","Non-Business")
regression_output = regression_output %>% 
  filter(term=="D")

coef.plot = ggplot(regression_output, aes(x = factor(sample,levels = c("Non-Business","Business","Total")), y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = estimate-qnorm(0.975)*std.error, ymax = estimate+qnorm(0.975)*std.error), width = 0) +
  geom_hline(aes(yintercept = 0), lty = 'dashed') +
  labs(x = "", y = "Total Number of Arrested Pimps", 
       title = "Displacement Effects") +
  coord_flip()+
  theme_bw()+
  theme(text = element_text(size=22))

ggsave(paste0(path,"figs/coef.plot.png"),coef.plot,width=10, height=7)
```

### Geographic Distribution

```{r Geographic Distribution Data Procession}
library(sf)
library(rnaturalearthdata)
zhejiang <- st_read(paste0(path,"data/Zhejiang/Zhejiang_county.shp"))
geoloc_2year = geo %>%
  filter(case_date>as.Date("2016-07-25") & case_date<as.Date("2019-07-25"))%>%
  mutate(case_month= as.yearmon(case_date))%>%
  #filter(str_detect(case_month,"Jun"))%>%
  mutate(T=as.numeric(case_date-as.numeric(as.Date("2017-07-25"))))%>%
  mutate(T_category = ifelse(T < 0,"1 Year Before Treatment",
                           ifelse(T>=0 &T<366,"1 Year After Treatment",
                             "2 Year After Treatment")))%>%
  mutate(T_category = factor(T_category, levels = c("1 Year Before Treatment","1 Year After Treatment","2 Year After Treatment"))) %>% 
  mutate(arrest = paste0("c(",arrest_lat,",",arrest_long,")"),
         police = paste0("c(",police_lat,",",police_long,")"))
```


```{r geolocation density plot}
geoloc_small<- geoloc_2year%>%
  drop_na(police_lat)%>%
  drop_na(arrest_lat) %>%  #68438 out of 97126 left!
  group_by(T_category,location_binary,police,arrest)%>%
  summarise(n=n()) %>% 
  mutate(police = gsub("c\\(|\\)", "", police)) %>%
  separate(police, into = c("police_lat", "police_long"), sep = ",",remove=FALSE) %>% 
  mutate(arrest = gsub("c\\(|\\)", "", arrest)) %>%
  separate(arrest, into = c("arrest_lat", "arrest_long"), sep = ",",remove=FALSE) 



geo_density_p <- geoloc_small %>%
  ggplot() +
  geom_sf(data = zhejiang, fill = "white", color = "black") +
  geom_point(aes(color = location_binary,x = as.numeric(arrest_long), y = as.numeric(arrest_lat), size = n), alpha = 0.03) +
  scale_size_continuous(range = c(1, 25), guide = "none") + 
  guides(color = guide_legend(title = NULL, override.aes = list(alpha = 1))) + 
  # geom_point(aes(x=as.numeric(police_long),y=as.numeric(police_lat)),color="black",size=1)+
  labs(title = "Arrest Location", x = "", y = "") + 
  facet_wrap(~factor(T_category)) +
  theme_bw() +
  theme(legend.position = "bottom",text = element_text(size=20))+
  scale_x_continuous(breaks = seq(118, 124, by = 2)) 

geo_density_p

ggsave(paste0(path,"figs/geo_density_p.png"),geo_density_p, width=10, height=7)

```

```{r}
nrow(admin_66)/(nrow(admin_67)+nrow(zj_crime))
```


```{r}
admin_price = admin_66 %>%
  select(case_date,case_month,price,location_binary)%>%
  drop_na(price)%>%
  filter(price<10000)



library(Rmisc)
admin_price_summary = summarySE(data = admin_price, measurevar = "price",groupvars = c("case_month","location_binary"),na.rm = T, conf.interval = 0.95)
ggplot(admin_price_summary, aes(x = case_month, y = price, color = location_binary,fill = location_binary)) +
  geom_ribbon(aes(ymin=price-ci, ymax=price+ci),alpha=0.32)+
    geom_line(position=position_dodge(0.1) ) +
    geom_point(position=position_dodge(0.1) )+
  labs(y="", x = "",title="嫖资",fill="Arrested Location")
```

```{r}

price_data <- admin_price %>%
  mutate(case_date = as.Date(case_date),
         year = year(case_date),
         month = month(case_date, label = TRUE),
         price = as.numeric(price))


# library(dplyr)
# monthly_price <- price_data %>% 
#   group_by(year, month) %>% 
#   summarise(
#     average_price = mean(price, na.rm = TRUE)
#   ) 

# price_data %>% 
#   count(year, month)
```


```{r}
admin_price = admin_66 %>%
  select(case_date,case_month,price,location_binary)%>%
  drop_na(price)%>%
  filter(price<10000)
price_data <- admin_price %>%
  mutate(case_date = as.Date(case_date),
         year = year(case_date),
         month = month(case_date, label = TRUE),
         price = as.numeric(price))

monthly_summary_location <- price_data %>%
  group_by(year, month, location_binary) %>%
  dplyr::summarize(
    count = n(),
    average_price = mean(price, na.rm = TRUE) 
  ) %>% 
  arrange(year, month, location_binary)%>%
  mutate(
    count_growth = (count - lag(count, 24)) / lag(count, 24),
    price_growth = (average_price - lag(average_price, 24)) / lag(average_price, 24)) %>% 
  drop_na() %>% 
    mutate(
    month_num = match(month, month.abb),
    month_year = make_date(year, month_num),
    count_growth_percent = count_growth * 100,
    price_growth_percent = price_growth * 100
  )




ggplot(monthly_summary_location, aes(x = month_year, y = count_growth_percent, group = location_binary, color = location_binary)) +
  geom_line() +
  labs(title = "Monthly Trend for Count Growth by Location",
       x = "Month-Year",
       y = "Count Growth") +
  theme_minimal() +
  theme(legend.position = "bottom")

business_data <- monthly_summary_location%>% filter(location_binary == "Business")

ggplot(business_data, aes(x = month_year)) +
  geom_line(aes(y = count_growth, color = "Count Growth")) +
  geom_line(aes(y = price_growth, color = "Price Growth")) +
  labs(title = "Monthly Trend for Business Location",
       x = "Month-Year",
       y = "Growth") +
  theme_minimal() +
  theme(legend.position = "bottom")

# ggsave("business_location_growth_trend.png", width = 10, height = 6)


non_business_data <- monthly_summary_location%>% filter(location_binary == "Non Business")

ggplot(non_business_data, aes(x = month_year)) +
  geom_line(aes(y = count_growth, color = "Count Growth")) +
  geom_line(aes(y = price_growth, color = "Price Growth")) +
  labs(title = "Monthly Trend for Non Business Location",
       x = "Month-Year",
       y = "Growth") +
  theme_minimal() +
  theme(legend.position = "bottom")

### USE PRICE AND COUNT NOT GROWTH
ggplot(business_data, aes(x = month_year)) +
  geom_line(aes(y = count/10, color = "Count")) +
  geom_line(aes(y = price_growth_percent, color = "Price Growth")) +
  labs(title = "Monthly Trend for Business Location",
       x = "Month-Year",
       y = "Growth") +
  theme_minimal() +
  theme(legend.position = "bottom")

# ggsave("business_location_growth_trend.png", width = 10, height = 6)




ggplot(non_business_data, aes(x = month_year)) +
  geom_line(aes(y = count_growth, color = "Count Growth")) +
  geom_line(aes(y = price_growth, color = "Price Growth")) +
  labs(title = "Monthly Trend for Non Business Location",
       x = "Month-Year",
       y = "Growth") +
  theme_minimal() +
  theme(legend.position = "bottom")



ggplot(business_data, aes(x = month_year)) +
  geom_line(aes(y = count, color = "Count Growth")) +
  geom_line(aes(y = price_growth * 100, color = "Price Growth")) +
  scale_y_continuous(name = "Count Growth",
                     sec.axis = sec_axis(~ . / 1000, name = "Price Growth")) +
  labs(title = "Monthly Trend for Business Location",
       x = "Month-Year",
       y = "Growth") +
  theme_minimal() +
  theme(legend.position = "bottom")





```



```{r}
yelp = read.csv(paste0(path,"Data/fengloug_20231027.csv"))%>% 
  filter(!is.na(prostitute_date)) %>% 
  mutate(prostitute_count = str_replace_all(prostitute_count, "人|个", "")) %>%
  rowwise() %>% 
  mutate(prostitute_date_new = clean_date(prostitute_date, publish_date)) %>% 
  mutate(price_new = clean_price(price)) %>% 
  mutate(prostitute_count_new = clean_count(prostitute_count)) %>% 
  ungroup()
```

